Plan of the course

  1. Introduction to Forecasting
  2. Regression Analysis and Forecasting
  3. Exponential Smoothing Methods
  4. Univariate Time Series Models
  5. Trend and Seasonality
  6. Forecasting Volatility

Setup

knitr::opts_chunk$set(include = TRUE)
# install.packages('pacman')  # Package Management Tool
#install.packages('fpp3', dependencies = TRUE)
rm(list=ls())
library(here)
## here() starts at /Users/miguelportela/Documents/GitHub/R_Training
library(pacman)
start_time <- Sys.time()  # calcular o tempo de execução do programa
setwd("/Users/miguelportela/Documents/GitHub/R_Training/timeseries/Code")
# setwd('D:\\miguel\\Dropbox\\1.miguel\\1.formacao\\r\\forecasting\\2.Lecture_Code\\_2021')
# setwd('D:\\miguel\\Dropbox\\Modelação e Análise de Dados com R - Módulo 8\\Code')
# setwd('C:\\Users\\mangelo.EEG\\Documents\\GitHub\\R_Training\\timeseries\\Code')
getwd()
## [1] "/Users/miguelportela/Documents/GitHub/R_Training/timeseries/Code"
# CHECK: `fpp3` package https://pkg.robjhyndman.com/fpp3-package/

pacman::p_load(here,
        tidyverse,
        haven,
        #xlsx,
        #readxl,
        ggplot2,
        aer,
        fpp3,
        TSA,
        forecast,
        fma,
        expsmooth,
        lmtest,
        tseries,
        gdata,
        foreign,
        pacman,
        urca)
## Warning: package 'aer' is not available for this version of R
## 
## A version of this package for your version of R might be available elsewhere,
## see the ideas at
## https://cran.r-project.org/doc/manuals/r-patched/R-admin.html#Installing-packages
## Warning: Perhaps you meant 'AER' ?
## Warning in p_install(package, character.only = TRUE, ...):
## Warning in library(package, lib.loc = lib.loc, character.only = TRUE,
## logical.return = TRUE, : there is no package called 'aer'
## Warning in pacman::p_load(here, tidyverse, haven, ggplot2, aer, fpp3, TSA, : Failed to install/load:
## aer

Lecture 1.a

# data: gold, tempdub, sales, google, oil.price, wages, larain
# data("beer",package="fma")
plot(beer,type = "l")

Lecture 1.b

data(wages) # ?TSA (Cryer, J. D. Time Series Analysis, Duxbury Press, 1986.)

plot(wages,type = "l")

mydata <- read.xls("sunspot.xlsx", 1)
# print(mydata)
#mydata2 = read.xls("sunspot.xlsx", sheet = 1, header = TRUE)
# Sunspots: a temporary phenomena on the Sun’s photosphere that appear darker than the surrounding areas
# Sunspots appear on an 11-year solar cycle, so we should expect to see a seasonality component to the data

sunspot <- as.ts(mydata$Sunspot)
postscript(file="sunspot.eps", paper="special", width=10, height=7, horizontal=FALSE) 
plot(sunspot ~ Year, mydata, type = "l")
plot(sunspot ~ Year, mydata, type = "l")

Lecture 2.a

# data("beer", package = "fma")
plot(beer,type = "l")

80% and 95% forecast interval: level=c(80,95)

beerfor1 <- meanf(beer, h=12)
plot(beerfor1, main="mean")

beerfor2 <- naive(beer, h=12)
plot(beerfor2, main="naive")

beerfor3 <- snaive(beer, h=12)
plot(beerfor3, main="snaive")

beerfor4 <- rwf(beer, drift=TRUE, h=12)
plot(beerfor4, main="drift")

#plot(forecast(beerfor4,level=c(80,95)))
summary(beerfor1)
## 
## Forecast method: Mean
## 
## Model Information:
## $mu
## [1] 149.3036
## 
## $mu.se
## [1] 2.619596
## 
## $sd
## [1] 19.60326
## 
## $bootstrap
## [1] FALSE
## 
## $call
## meanf(y = beer, h = 12)
## 
## attr(,"class")
## [1] "meanf"
## 
## Error measures:
##                       ME     RMSE     MAE       MPE     MAPE     MASE      ACF1
## Training set 1.21808e-14 19.42745 15.3361 -1.586682 10.12517 1.670268 0.4207789
## 
## Forecasts:
##          Point Forecast    Lo 80    Hi 80    Lo 95    Hi 95
## Sep 1995       149.3036 123.6495 174.9577 109.6685 188.9386
## Oct 1995       149.3036 123.6495 174.9577 109.6685 188.9386
## Nov 1995       149.3036 123.6495 174.9577 109.6685 188.9386
## Dec 1995       149.3036 123.6495 174.9577 109.6685 188.9386
## Jan 1996       149.3036 123.6495 174.9577 109.6685 188.9386
## Feb 1996       149.3036 123.6495 174.9577 109.6685 188.9386
## Mar 1996       149.3036 123.6495 174.9577 109.6685 188.9386
## Apr 1996       149.3036 123.6495 174.9577 109.6685 188.9386
## May 1996       149.3036 123.6495 174.9577 109.6685 188.9386
## Jun 1996       149.3036 123.6495 174.9577 109.6685 188.9386
## Jul 1996       149.3036 123.6495 174.9577 109.6685 188.9386
## Aug 1996       149.3036 123.6495 174.9577 109.6685 188.9386
summary(beerfor2)
## 
## Forecast method: Naive method
## 
## Model Information:
## Call: naive(y = beer, h = 12) 
## 
## Residual sd: 21 
## 
## Error measures:
##                ME RMSE      MAE       MPE     MAPE     MASE      ACF1
## Training set -0.2   21 16.16364 -1.088528 10.90629 1.760396 -0.172757
## 
## Forecasts:
##          Point Forecast     Lo 80    Hi 80     Lo 95    Hi 95
## Sep 1995            153 126.08742 179.9126 111.84076 194.1592
## Oct 1995            153 114.93986 191.0601  94.79204 211.2080
## Nov 1995            153 106.38604 199.6140  81.71010 224.2899
## Dec 1995            153  99.17483 206.8252  70.68151 235.3185
## Jan 1996            153  92.82164 213.1784  60.96513 245.0349
## Feb 1996            153  87.07790 218.9221  52.18085 253.8191
## Mar 1996            153  81.79600 224.2040  44.10288 261.8971
## Apr 1996            153  76.87972 229.1203  36.58408 269.4159
## May 1996            153  72.26225 233.7377  29.52227 276.4777
## Jun 1996            153  67.89494 238.1051  22.84304 283.1570
## Jul 1996            153  63.74106 242.2589  16.49023 289.5098
## Aug 1996            153  59.77208 246.2279  10.42020 295.5798
accuracy(beerfor1)
##                       ME     RMSE     MAE       MPE     MAPE     MASE      ACF1
## Training set 1.21808e-14 19.42745 15.3361 -1.586682 10.12517 1.670268 0.4207789
accuracy(beerfor2)
##                ME RMSE      MAE       MPE     MAPE     MASE      ACF1
## Training set -0.2   21 16.16364 -1.088528 10.90629 1.760396 -0.172757
accuracy(beerfor3)
##                     ME     RMSE      MAE       MPE     MAPE MASE      ACF1
## Training set -2.681818 11.43956 9.181818 -2.064328 6.332352    1 -0.141297
accuracy(beerfor4)
##                         ME     RMSE      MAE      MPE     MAPE     MASE
## Training set -9.560219e-15 20.99905 16.18909 -0.95219 10.91464 1.763168
##                   ACF1
## Training set -0.172757

Lecture 2.b

# data("dj",package = "fma")
plot(dj)

Use 250 observations in the in-sample period

dj1 <- window(dj, end=250)
plot(dj1)

#Out-of-sample period from 251 until 292

dj2 <- window(dj, start=251)
#80% and 95% forecast interval: level=c(80,95)
djfor1 <- meanf(dj1, h=42)
plot(djfor1, main="mean")

djfor2 <- naive(dj1, h=42)
plot(djfor2, main="naive")

djfor3 <- snaive(dj1, h=42)
plot(djfor3, main="snaive")

djfor4 <- rwf(dj1, drift=TRUE, h=42)
plot(djfor4, main="drift")

measures of forecast accuracy

accuracy(meanf(dj1,h=42), dj2)
##                        ME      RMSE       MAE         MPE     MAPE     MASE
## Training set 6.553874e-14  98.71439  80.56688 -0.06934572 2.151962 4.920567
## Test set     1.424185e+02 148.23574 142.41848  3.66304611 3.663046 8.698111
##                   ACF1 Theil's U
## Training set 0.9719593        NA
## Test set     0.8255136  6.072223
accuracy(naive(dj1,h=42), dj2)
##                      ME     RMSE      MAE        MPE      MAPE     MASE
## Training set  0.7188755 22.00014 16.37349 0.01749683 0.4380973 1.000000
## Test set     46.4404762 62.02846 54.44048 1.18683463 1.3979371 3.324915
##                    ACF1 Theil's U
## Training set 0.02446257        NA
## Test set     0.82551365   2.54582
accuracy(snaive(dj1,h=42), dj2)
##                      ME     RMSE      MAE        MPE      MAPE     MASE
## Training set  0.7188755 22.00014 16.37349 0.01749683 0.4380973 1.000000
## Test set     46.4404762 62.02846 54.44048 1.18683463 1.3979371 3.324915
##                    ACF1 Theil's U
## Training set 0.02446257        NA
## Test set     0.82551365   2.54582
accuracy(rwf(dj1,drift=TRUE,h=42), dj2)
##                        ME     RMSE      MAE          MPE      MAPE      MASE
## Training set 1.278395e-13 21.98839 16.34525 -0.001766862 0.4373707 0.9982752
## Test set     3.098465e+01 53.69767 45.72743  0.787547945 1.1757748 2.7927719
##                    ACF1 Theil's U
## Training set 0.02446257        NA
## Test set     0.83881869  2.203742

Lecture 2.c

# download.file('http://fmwww.bc.edu/ec-p/data/wooldridge/hprice1.dta','hprice1.dta',mode='wb')
hprice <- read.dta('hprice1.dta')

Some statistics

  summary(hprice)
##      price           assess          bdrms          lotsize          sqrft     
##  Min.   :111.0   Min.   :198.7   Min.   :2.000   Min.   : 1000   Min.   :1171  
##  1st Qu.:230.0   1st Qu.:253.9   1st Qu.:3.000   1st Qu.: 5733   1st Qu.:1660  
##  Median :265.5   Median :290.2   Median :3.000   Median : 6430   Median :1845  
##  Mean   :293.5   Mean   :315.7   Mean   :3.568   Mean   : 9020   Mean   :2014  
##  3rd Qu.:326.2   3rd Qu.:352.1   3rd Qu.:4.000   3rd Qu.: 8583   3rd Qu.:2227  
##  Max.   :725.0   Max.   :708.6   Max.   :7.000   Max.   :92681   Max.   :3880  
##     colonial          lprice         lassess         llotsize     
##  Min.   :0.0000   Min.   :4.710   Min.   :5.292   Min.   : 6.908  
##  1st Qu.:0.0000   1st Qu.:5.438   1st Qu.:5.537   1st Qu.: 8.654  
##  Median :1.0000   Median :5.582   Median :5.671   Median : 8.769  
##  Mean   :0.6932   Mean   :5.633   Mean   :5.718   Mean   : 8.905  
##  3rd Qu.:1.0000   3rd Qu.:5.788   3rd Qu.:5.864   3rd Qu.: 9.058  
##  Max.   :1.0000   Max.   :6.586   Max.   :6.563   Max.   :11.437  
##      lsqrft     
##  Min.   :7.066  
##  1st Qu.:7.415  
##  Median :7.520  
##  Mean   :7.573  
##  3rd Qu.:7.708  
##  Max.   :8.264
  cor(hprice)
##              price     assess     bdrms    lotsize      sqrft   colonial
## price    1.0000000 0.90527938 0.5080835 0.34712448 0.78790655 0.13794578
## assess   0.9052794 1.00000000 0.4824739 0.32814633 0.86563447 0.08293582
## bdrms    0.5080835 0.48247394 1.0000000 0.13632563 0.53147364 0.30457549
## lotsize  0.3471245 0.32814633 0.1363256 1.00000000 0.18384224 0.01401865
## sqrft    0.7879065 0.86563447 0.5314736 0.18384224 1.00000000 0.06542126
## colonial 0.1379458 0.08293582 0.3045755 0.01401865 0.06542126 1.00000000
## lprice   0.9665014 0.86823970 0.4634897 0.32455387 0.76400002 0.18051612
## lassess  0.8731444 0.98296161 0.4587439 0.30990708 0.86621376 0.10997230
## llotsize 0.5287804 0.57166539 0.1694902 0.80785523 0.33860720 0.03864210
## lsqrft   0.7503026 0.84718487 0.5195793 0.16487038 0.98581609 0.10628626
##             lprice   lassess  llotsize    lsqrft
## price    0.9665014 0.8731444 0.5287804 0.7503026
## assess   0.8682397 0.9829616 0.5716654 0.8471849
## bdrms    0.4634897 0.4587439 0.1694902 0.5195793
## lotsize  0.3245539 0.3099071 0.8078552 0.1648704
## sqrft    0.7640000 0.8662138 0.3386072 0.9858161
## colonial 0.1805161 0.1099723 0.0386421 0.1062863
## lprice   1.0000000 0.8750037 0.5041425 0.7436335
## lassess  0.8750037 1.0000000 0.5577345 0.8646644
## llotsize 0.5041425 0.5577345 1.0000000 0.3112993
## lsqrft   0.7436335 0.8646644 0.3112993 1.0000000
  cor(hprice$price,hprice$sqrft)
## [1] 0.7879065

OLS estimation single regression

lm.r <- lm(price ~ sqrft, data = hprice)
summary(lm.r)
## 
## Call:
## lm(formula = price ~ sqrft, data = hprice)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -117.112  -36.348   -6.503   31.701  235.253 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 11.20415   24.74261   0.453    0.652    
## sqrft        0.14021    0.01182  11.866   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 63.62 on 86 degrees of freedom
## Multiple R-squared:  0.6208, Adjusted R-squared:  0.6164 
## F-statistic: 140.8 on 1 and 86 DF,  p-value: < 2.2e-16

Residuals

resid(lm.r) #lm.r$res 
##            1            2            3            4            5            6 
##  -53.0385077   67.7178661  -12.8540278  -19.2296402    9.3054580   68.9298174 
##            7            8            9           10           11           12 
##   31.4797649   61.0906533  -52.9569419  -36.2028921  -53.7369880  -80.5198592 
##           13           14           15           16           17           18 
##  -79.4161934  -65.4647909  -70.3719245  -30.5754712  -51.6260996   25.0615812 
##           19           20           21           22           23           24 
##   63.8655502   41.5087116  -32.3562265  -39.0122608  -34.0971529 -104.0495577 
##           25           26           27           28           29           30 
##   33.8920477   -7.0917532  -52.0917532   36.5115368  -28.5086839   51.2231405 
##           31           32           33           34           35           36 
##   -1.7560123  -72.8609998   11.2519620  -34.1923433   60.1199759  -59.8432284 
##           37           38           39           40           41           42 
##  -36.7843326   19.7772631  -62.1542899   14.5560562    1.7652107  235.2530896 
##           43           44           45           46           47           48 
##    4.6064766   54.6088005   42.2920840    6.4636910  -86.3081302 -117.1117233 
##           49           50           51           52           53           54 
##   26.4317938   74.1302742  -23.7001923   36.5640305    1.3787989  -85.2664359 
##           55           56           57           58           59           60 
##   -2.6474479  -10.7796846  -36.9173210  -12.5944530    7.8897237  -23.7184648 
##           61           62           63           64           65           66 
##   -5.9147465   21.4847887  104.4634404   93.1137274   -1.6338233  113.9192969 
##           67           68           69           70           71           72 
##   -0.2081666    5.7896347  -66.0356825  -13.7189660  -33.7215406   13.4317938 
##           73           74           75           76           77           78 
##  200.3432561  -24.6104016  -14.3693500  203.1989671   68.9980375   12.2946586 
##           79           80           81           82           83           84 
##  -35.5309093   32.3628503 -115.4279952  -20.6968801  -60.2450876   26.2282897 
##           85           86           87           88 
##  -15.6659711  -29.3962233   41.6458469  -17.9384188

Predicted values

fitted(lm.r) #lm.r$fit 
##        1        2        3        4        5        6        7        8 
## 353.0385 302.2821 203.8540 214.2296 363.6945 397.3452 301.0202 253.9093 
##        9       10       11       12       13       14       15       16 
## 258.9569 276.2029 338.7370 380.5199 484.4162 277.4648 335.3719 257.9755 
##       17       18       19       20       21       22       23       24 
## 291.6261 259.9384 204.1344 268.4913 298.3562 309.0123 259.0972 254.0496 
##       25       26       27       28       29       30       31       32 
## 213.1080 282.0918 282.0918 306.4885 506.0087 298.7769 231.7560 407.8610 
##       33       34       35       36       37       38       39       40 
## 239.7480 269.1923 300.8800 249.8432 396.7843 555.2227 271.1553 210.4439 
##       41       42       43       44       45       46       47       48 
## 244.2348 478.2469 243.3935 175.3912 332.7079 258.5363 399.3081 534.6117 
##       49       50       51       52       53       54       55       56 
## 226.5682 240.8697 287.7002 218.4360 208.6212 265.2664 252.6474 260.7797 
##       57       58       59       60       61       62       63       64 
## 245.9173 270.5945 281.1103 339.7185 230.9147 244.5152 205.5366 378.1363 
##       65       66       67       68       69       70       71       72 
## 336.6338 381.0807 279.7082 374.2104 391.0357 233.7190 248.7215 226.5682 
##       73       74       75       76       77       78       79       80 
## 524.6567 254.6104 320.3693 221.8010 249.0020 317.7053 281.5309 192.6371 
##       81       82       83       84       85       86       87       88 
## 226.4280 288.8219 304.2451 268.7717 251.6660 231.8962 177.3542 259.9384

Scatterplot with fitted line

plot(price ~ sqrft, hprice)
abline(lm.r) 

plot(lm.r$res ~ sqrft, hprice)

Regression with logs between lprice and lsqrft

lm.r4 <- lm(lprice ~ lsqrft, data = hprice)
summary(lm.r4)
## 
## Call:
## lm(formula = lprice ~ lsqrft, data = hprice)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.71742 -0.12714 -0.01056  0.12371  0.64410 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -0.9751     0.6411  -1.521    0.132    
## lsqrft        0.8727     0.0846  10.315   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2041 on 86 degrees of freedom
## Multiple R-squared:  0.553,  Adjusted R-squared:  0.5478 
## F-statistic: 106.4 on 1 and 86 DF,  p-value: < 2.2e-16
plot(lm.r4$res ~ lsqrft, hprice)

plot(lprice ~ lsqrft, hprice)
abline(lm.r4)

Regression with logs between lprice and lassess

lm.r5 <- lm(lprice ~ lassess, data = hprice)
summary(lm.r5)
## 
## Call:
## lm(formula = lprice ~ lassess, data = hprice)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.51044 -0.08257 -0.00496  0.08033  0.60799 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -0.16147    0.34607  -0.467    0.642    
## lassess      1.01341    0.06046  16.761   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1478 on 86 degrees of freedom
## Multiple R-squared:  0.7656, Adjusted R-squared:  0.7629 
## F-statistic: 280.9 on 1 and 86 DF,  p-value: < 2.2e-16
plot(lm.r5$res ~ lassess, hprice)

plot(lprice ~ lassess, hprice)
abline(lm.r5)

Confidence intervals for the coefficients

confint(lm.r5,level=0.95)
##                  2.5 %    97.5 %
## (Intercept) -0.8494464 0.5264978
## lassess      0.8932147 1.1335992

Prediction interval for a single observation

plot(lprice ~ lassess, hprice)
abline(lm.r5)

newdata = data.frame(lassess=250)

Interval estimate on the mean at a specific point

con.1 <- predict(lm.r5, newdata, level = 0.90, interval="confidence")
# lines(newdata,con.1[,c("lwr","upr")],col="red",lty=1,type="l")
newdata = data.frame(lassess=250)

Interval estimate on a single future observation

pred.1 <- predict(lm.r5, newdata, level = 0.90, interval="predict")
# lines(newdata,pred.1[,c("lwr","upr")],col="blue",lty=1,type="l")

Plot prediction interval

conf.2 <- predict(lm.r5, level = 0.95, interval="confidence")
pred.2 <- predict(lm.r5, level = 0.95, interval="predict")
## Warning in predict.lm(lm.r5, level = 0.95, interval = "predict"): predictions on current data refer to _future_ responses
fitted = pred.2[,1]
pred.lower = pred.2[,2]
pred.upper = pred.2[,3]
plot(lprice ~ lassess, hprice)
lines(hprice$lassess,fitted,col="red",lwd=2)
lines(hprice$lassess,pred.lower,lwd=1,col="blue")
lines(hprice$lassess,pred.upper,lwd=1,col="blue")

OLS estimation multiple regression

lm.r2 <- lm(price ~ sqrft + bdrms, data = hprice)
summary(lm.r2)
## 
## Call:
## lm(formula = price ~ sqrft + bdrms, data = hprice)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -127.627  -42.876   -7.051   32.589  229.003 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -19.31500   31.04662  -0.622    0.536    
## sqrft         0.12844    0.01382   9.291 1.39e-14 ***
## bdrms        15.19819    9.48352   1.603    0.113    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 63.04 on 85 degrees of freedom
## Multiple R-squared:  0.6319, Adjusted R-squared:  0.6233 
## F-statistic: 72.96 on 2 and 85 DF,  p-value: < 2.2e-16

OLS estimation multiple regression

lm.r3 <- lm(lprice ~ lsqrft + llotsize, data = hprice)
summary(lm.r3)
## 
## Call:
## lm(formula = lprice ~ lsqrft + llotsize, data = hprice)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.65332 -0.11051 -0.00648  0.11822  0.66417 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -1.64007    0.60188  -2.725  0.00781 ** 
## lsqrft       0.76237    0.08089   9.425 7.44e-15 ***
## llotsize     0.16846    0.03846   4.380 3.37e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1855 on 85 degrees of freedom
## Multiple R-squared:  0.6353, Adjusted R-squared:  0.6267 
## F-statistic: 74.04 on 2 and 85 DF,  p-value: < 2.2e-16

Lecture 2.d

download.file('http://fmwww.bc.edu/ec-p/data/wooldridge/wage1.dta','wage1.dta',mode='wb')
wage <- read.dta('wage1.dta')
fit.r <- lm(lwage ~ educ + exper, data = wage)
summary(fit.r)
## 
## Call:
## lm(formula = lwage ~ educ + exper, data = wage)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.05800 -0.30136 -0.04539  0.30601  1.44425 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 0.216854   0.108595   1.997   0.0464 *  
## educ        0.097936   0.007622  12.848  < 2e-16 ***
## exper       0.010347   0.001555   6.653 7.24e-11 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4614 on 523 degrees of freedom
## Multiple R-squared:  0.2493, Adjusted R-squared:  0.2465 
## F-statistic: 86.86 on 2 and 523 DF,  p-value: < 2.2e-16
plot(fit.r$res ~ educ, wage)

plot(lwage ~ educ, wage)
abline(fit.r)
## Warning in abline(fit.r): only using the first two of 3 regression coefficients

Coefficients

fit.r$coef #(or coef(fit.r))
## (Intercept)        educ       exper 
##  0.21685433  0.09793557  0.01034695

Residuals

# fit.r$res 

Predicted values

# fit.r$fit 

Prediction interval for a single observation

newdata = data.frame(educ=12, exper=5)

Interval estimate on the mean at a specific point

predict(fit.r, newdata, level = 0.95, interval="confidence")
##        fit      lwr      upr
## 1 1.443816 1.387547 1.500085

Interval estimate on a single future observation

predict(fit.r, newdata, level = 0.95, interval="predict")
##        fit       lwr      upr
## 1 1.443816 0.5356328 2.351999

Prediction interval for several observations

neweduc=c(12,15,16)
newexper=c(5,7,10)
predict(fit.r,data.frame(educ = neweduc,exper=newexper), 
        level = 0.95, interval = "predict")
##        fit       lwr      upr
## 1 1.443816 0.5356328 2.351999
## 2 1.758317 0.8501361 2.666497
## 3 1.887293 0.9786455 2.795940

Lecture 3.0

data(elecsales, package = "fpp")
plot(elecsales,ylab="Electricity", xlab="Year")

ma.1 <- ma(elecsales,order=5)
plot(elecsales, main="Electricity",
     ylab="GWh", xlab="Year")
lines(ma(elecsales,3),col="red")
lines(ma(elecsales,9),col="blue")

print(ma.1)
## Time Series:
## Start = 1989 
## End = 2008 
## Frequency = 1 
##  [1]       NA       NA 2381.530 2424.556 2463.758 2552.598 2627.700 2750.622
##  [9] 2858.348 3014.704 3077.300 3144.520 3188.700 3202.320 3216.940 3307.296
## [17] 3398.754 3485.434       NA       NA
print(elecsales)
## Time Series:
## Start = 1989 
## End = 2008 
## Frequency = 1 
##  [1] 2354.34 2379.71 2318.52 2468.99 2386.09 2569.47 2575.72 2762.72 2844.50
## [10] 3000.70 3108.10 3357.50 3075.70 3180.60 3221.60 3176.20 3430.60 3527.48
## [19] 3637.89 3655.00

Lecture 3.a

data(oil, package = "fpp")
plot(oil)

oildata <- window(oil,start=1996,end=2007)
plot(oildata, ylab="Oil (millions of tonnes)", xlab="Year")

Simple exponential smoothing

# lo=y1 or initial=optimal

fit1 <- ses(oildata, alpha=0.2, initial="simple", h=3)

fit2 <- ses(oildata, alpha=0.6, initial="simple", h=3)

# fit3 <- ses(oildata, h=3)

Plot for alpha=0.2,0.6,0.89

plot(fit1, plot.conf=FALSE, ylab="Oil (millions of tonnes)",
     xlab="Year", main="", fcol="white", type="o")
## Warning in plot.window(xlim, ylim, log, ...): "plot.conf" is not a graphical
## parameter
## Warning in title(main = main, xlab = xlab, ylab = ylab, ...): "plot.conf" is not
## a graphical parameter
## Warning in axis(1, ...): "plot.conf" is not a graphical parameter
## Warning in axis(2, ...): "plot.conf" is not a graphical parameter
## Warning in box(...): "plot.conf" is not a graphical parameter
lines(fitted(fit1), col="blue", type="o")
lines(fitted(fit2), col="red", type="o")
# lines(fitted(fit3), col="green", type="o")
lines(fit1$mean, col="blue", type="o")
lines(fit2$mean, col="red", type="o")
# lines(fit3$mean, col="green", type="o")
legend("topleft",lty=1, col=c(1,"blue","red","green"), 
       c("data", expression(alpha == 0.2), expression(alpha == 0.6),
         expression(alpha == 0.89)),pch=1)

oildata
## Time Series:
## Start = 1996 
## End = 2007 
## Frequency = 1 
##  [1] 446.6565 454.4733 455.6630 423.6322 456.2713 440.5881 425.3325 485.1494
##  [9] 506.0482 526.7920 514.2689 494.2110

Level lt for each alpha

fit1$model$state
## Time Series:
## Start = 1995 
## End = 2007 
## Frequency = 1 
##              l
##  [1,] 446.6565
##  [2,] 446.6565
##  [3,] 448.2199
##  [4,] 449.7085
##  [5,] 444.4932
##  [6,] 446.8489
##  [7,] 445.5967
##  [8,] 441.5439
##  [9,] 450.2650
## [10,] 461.4216
## [11,] 474.4957
## [12,] 482.4503
## [13,] 484.8025
fit2$model$state
## Time Series:
## Start = 1995 
## End = 2007 
## Frequency = 1 
##              l
##  [1,] 446.6565
##  [2,] 446.6565
##  [3,] 451.3466
##  [4,] 453.9364
##  [5,] 435.7539
##  [6,] 448.0644
##  [7,] 443.5786
##  [8,] 432.6309
##  [9,] 464.1420
## [10,] 489.2857
## [11,] 511.7895
## [12,] 513.2771
## [13,] 501.8375
# fit3$model$state
# fitted(fit1)
# fitted(fit2)
# fitted(fit3)

Summary of results

summary(fit1)
## 
## Forecast method: Simple exponential smoothing
## 
## Model Information:
## Simple exponential smoothing 
## 
## Call:
##  ses(y = oildata, h = 3, initial = "simple", alpha = 0.2) 
## 
##   Smoothing parameters:
##     alpha = 0.2 
## 
##   Initial states:
##     l = 446.6565 
## 
##   sigma:  32.1348
## Error measures:
##                    ME    RMSE      MAE      MPE     MAPE     MASE      ACF1
## Training set 15.89414 32.1348 24.66102 3.010684 5.067472 1.136664 0.5102479
## 
## Forecasts:
##      Point Forecast    Lo 80    Hi 80    Lo 95    Hi 95
## 2008       484.8025 443.6201 525.9849 421.8194 547.7855
## 2009       484.8025 442.8045 526.8004 420.5721 549.0328
## 2010       484.8025 442.0045 527.6005 419.3486 550.2564
summary(fit2)
## 
## Forecast method: Simple exponential smoothing
## 
## Model Information:
## Simple exponential smoothing 
## 
## Call:
##  ses(y = oildata, h = 3, initial = "simple", alpha = 0.6) 
## 
##   Smoothing parameters:
##     alpha = 0.6 
## 
##   Initial states:
##     l = 446.6565 
## 
##   sigma:  25.9784
## Error measures:
##                    ME     RMSE      MAE      MPE     MAPE      MASE      ACF1
## Training set 7.664019 25.97844 20.17946 1.406176 4.239181 0.9301025 0.1642665
## 
## Forecasts:
##      Point Forecast    Lo 80    Hi 80    Lo 95    Hi 95
## 2008       501.8375 468.5447 535.1302 450.9206 552.7543
## 2009       501.8375 463.0118 540.6631 442.4588 561.2162
## 2010       501.8375 458.1745 545.5004 435.0607 568.6142
# summary(fit3)

Lecture 3.b

data(ausair, package = "fpp")
#original data - from 1970 until 2010
plot(ausair) 

air <- window(ausair,start=1990,end=2004)
fit1 <- holt(air, alpha=0.8, beta=0.2, initial="simple", h=5) 
fit2 <- holt(air, alpha=0.8, beta=0.2, initial="simple", exponential=TRUE, h=5) 
# fit3 <- holt(air, alpha=0.8, beta=0.2, damped=TRUE, initial="simple", h=5) 
plot(air,ylab="Air passengers", xlab="Year")

air
## Time Series:
## Start = 1990 
## End = 2004 
## Frequency = 1 
##  [1] 17.55340 21.86010 23.88660 26.92930 26.88850 28.83140 30.07510 30.95350
##  [9] 30.18570 31.57970 32.57757 33.47740 39.02158 41.38643 41.59655
#print(air)

Results for first model

#level lt and trend bt
fit1$model$state 
## Time Series:
## Start = 1989 
## End = 2004 
## Frequency = 1 
##             l        b
## 1989 17.55340 4.306700
## 1990 18.41474 3.617628
## 1991 21.89455 3.590065
## 1992 24.20620 3.334382
## 1993 27.05156 3.236576
## 1994 27.56843 2.692635
## 1995 29.11733 2.463889
## 1996 30.37632 2.222910
## 1997 31.28265 1.959592
## 1998 30.79701 1.470546
## 1999 31.71727 1.360489
## 2000 32.67761 1.280459
## 2001 33.57353 1.203552
## 2002 38.17268 1.882672
## 2003 41.12022 2.095644
## 2004 41.92041 1.836555
#Fitted values (lt+bt)
fitted(fit1)
## Time Series:
## Start = 1990 
## End = 2004 
## Frequency = 1 
##  [1] 21.86010 22.03237 25.48462 27.54059 30.28813 30.26106 31.58122 32.59923
##  [9] 33.24224 32.26755 33.07776 33.95807 34.77708 40.05535 43.21586

Forecasts

fit1$mean
## Time Series:
## Start = 2005 
## End = 2009 
## Frequency = 1 
## [1] 43.75697 45.59352 47.43008 49.26663 51.10319

Plots

plot(fit2, type="o", ylab="Air passengers", xlab="Year", 
     fcol="white", plot.conf=FALSE, main="")
## Warning in plot.window(xlim, ylim, log, ...): "plot.conf" is not a graphical
## parameter
## Warning in title(main = main, xlab = xlab, ylab = ylab, ...): "plot.conf" is not
## a graphical parameter
## Warning in axis(1, ...): "plot.conf" is not a graphical parameter
## Warning in axis(2, ...): "plot.conf" is not a graphical parameter
## Warning in box(...): "plot.conf" is not a graphical parameter
lines(fitted(fit1), col="blue") 
lines(fitted(fit2), col="red")
# lines(fitted(fit3), col="green")
lines(fit1$mean, col="blue", type="o") 
lines(fit2$mean, col="red", type="o")
# lines(fit3$mean, col="green", type="o")
legend("topleft", lty=1, col=c("black","blue","red","green"), 
       c("Data","Holt's linear trend","Exponential trend","Additive damped trend"))

Forecast results

summary(fit1)
## 
## Forecast method: Holt's method
## 
## Model Information:
## Holt's method 
## 
## Call:
##  holt(y = air, h = 5, initial = "simple", alpha = 0.8, beta = 0.2) 
## 
##   Smoothing parameters:
##     alpha = 0.8 
##     beta  = 0.2 
## 
##   Initial states:
##     l = 17.5534 
##     b = 4.3067 
## 
##   sigma:  2.2029
## Error measures:
##                     ME     RMSE      MAE       MPE     MAPE     MASE     ACF1
## Training set -1.029227 2.202869 1.772637 -4.485612 6.364749 0.967131 0.208894
## 
## Forecasts:
##      Point Forecast    Lo 80    Hi 80    Lo 95    Hi 95
## 2005       43.75697 40.93388 46.58006 39.43943 48.07451
## 2006       45.59352 41.60107 49.58598 39.48760 51.69945
## 2007       47.43008 42.19403 52.66613 39.42223 55.43793
## 2008       49.26663 42.70637 55.82690 39.23357 59.29970
## 2009       51.10319 43.13827 59.06810 38.92190 63.28448
summary(fit2)
## 
## Forecast method: Holt's method with exponential trend
## 
## Model Information:
## Holt's method with exponential trend 
## 
## Call:
##  holt(y = air, h = 5, initial = "simple", exponential = TRUE,  
## 
##  Call:
##      alpha = 0.8, beta = 0.2) 
## 
##   Smoothing parameters:
##     alpha = 0.8 
##     beta  = 0.2 
## 
##   Initial states:
##     l = 17.5534 
##     b = 1.2453 
## 
##   sigma:  0.0961
## Error measures:
##                     ME     RMSE     MAE       MPE     MAPE     MASE      ACF1
## Training set -2.010092 2.908422 2.56568 -7.724285 9.137707 1.399806 0.3109081
## 
## Forecasts:
##      Point Forecast    Lo 80    Hi 80    Lo 95    Hi 95
## 2005       44.59872 38.90714 50.02901 35.83578 53.13567
## 2006       47.24164 38.93871 55.46601 35.21957 60.08140
## 2007       50.04117 38.97789 61.87087 34.03743 69.72935
## 2008       53.00661 38.59431 69.19751 33.11040 79.65168
## 2009       56.14778 38.49502 77.80103 32.00086 91.00085
# summary(fit3)

Lecture 3.c

data(austourists, package = "fpp")
#original data - from 1970 until 2010
plot(austourists, ylab="Tourists", xlab="Year") 

aust <- window(austourists,start=2005)
#print(aust)

Fitted values (lt+bt)

Forecasts

Forecast results

Lecture 3.d

data(ausbeer, package = "fpp")
plot(ausbeer, ylab="Beer", xlab="Year") 

ets

Lecture 4.0

data("dj", package = "fma")
## Warning in data("dj", package = "fma"): data set 'dj' not found
par(mfrow=c(1,1))
plot(dj)

retdj<-diff(log(dj))
plot(retdj)

acf(dj, lag.max = 30, na=na.omit, main="", ylab="ACF", xlab="Lag")

acf

#acf(retdj, lag.max = 30, na=na.omit, main="", ylab="ACF", xlab="Lag",ylim=c(-0.2,0.2))
acf(retdj, lag.max = 30, na=na.omit, main="", ylab="ACF", xlab="Lag")

ar2<-arima.sim(model=list(ar=c(.9,-.2)),n=100) + 0.3
ar1<-arima.sim(model=list(ar=c(.8)),n=100) 
plot(ar2)

plot(ar1)

acf(ar1)

pacf(ar1)

acf(ar2)

pacf(ar2)

ma2<-arima.sim(model=list(ma=c(-.7,.1)),n=100)
ma1<-arima.sim(model=list(ma=c(.4)),n=100) + 0.1
plot(ma1)

plot(ma2)

acf(ma1, lag.max = 40)

pacf(ma1,lag.max = 40)

acf(ma2)

pacf(ma2)

set.seed(123)
wn = rnorm(500, mean=0, sd=1)
plot(wn, type="l", ylab="wn", xlab="time")

acf(wn)

rw <- cumsum(rnorm(500, mean = 0))
plot(rw, type="l", ylab="rw", xlab="time")

acf(rw)

Lecture 4.a

# data("dj", package = "fma")

Check (non)stationarity

plot(dj)

acf(dj)

pacf(dj)

Augmented Dickey-Fuller

Augmented Dickey Fuller test

adf=ur.df(dj,type="none",lags=1)
summary(adf)
## 
## ############################################### 
## # Augmented Dickey-Fuller Test Unit Root Test # 
## ############################################### 
## 
## Test regression none 
## 
## 
## Call:
## lm(formula = z.diff ~ z.lag.1 - 1 + z.diff.lag)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -97.259 -13.188   1.136  13.392  69.175 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)
## z.lag.1    0.0001629  0.0003534   0.461    0.645
## z.diff.lag 0.0484155  0.0591327   0.819    0.414
## 
## Residual standard error: 22.59 on 288 degrees of freedom
## Multiple R-squared:  0.003165,   Adjusted R-squared:  -0.003757 
## F-statistic: 0.4573 on 2 and 288 DF,  p-value: 0.6335
## 
## 
## Value of test-statistic is: 0.461 
## 
## Critical values for test statistics: 
##       1pct  5pct 10pct
## tau1 -2.58 -1.95 -1.62

adf.test(dj)

retdj <- diff(log(dj))
plot(retdj)

acf(retdj)

pacf(retdj)

ARIMA(0,1,0)

dj.ar1 <- arima(dj, c(0, 1, 0))
summary(dj.ar1) #
## 
## Call:
## arima(x = dj, order = c(0, 1, 0))
## 
## 
## sigma^2 estimated as 506.6:  log likelihood = -1319.04,  aic = 2638.09
## 
## Training set error measures:
## Warning in trainingaccuracy(object, test, d, D): test elements must be within
## sample
##               ME RMSE MAE MPE MAPE
## Training set NaN  NaN NaN NaN  NaN
acf(residuals(dj.ar1))

Box.test(residuals(dj.ar1), lag=20, fitdf=0, type="Ljung")
## 
##  Box-Ljung test
## 
## data:  residuals(dj.ar1)
## X-squared = 25.772, df = 20, p-value = 0.1735
tsdiag(dj.ar1) 

# plot(forecast(dj.ar1)) #

AR(1)

dj_ar1 <- arima(dj, c(1, 0, 0))
summary(dj_ar1)
## 
## Call:
## arima(x = dj, order = c(1, 0, 0))
## 
## Coefficients:
##          ar1  intercept
##       0.9770  3754.1925
## s.e.  0.0117    50.1831
## 
## sigma^2 estimated as 500.7:  log likelihood = -1323.42,  aic = 2650.83
## 
## Training set error measures:
## Warning in trainingaccuracy(object, test, d, D): test elements must be within
## sample
##               ME RMSE MAE MPE MAPE
## Training set NaN  NaN NaN NaN  NaN
acf(residuals(dj_ar1))

Box.test(residuals(dj_ar1), lag=20, fitdf=1, type="Ljung")
## 
##  Box-Ljung test
## 
## data:  residuals(dj_ar1)
## X-squared = 24.932, df = 19, p-value = 0.1628
tsdiag(dj_ar1) 

# plot(forecast(dj_ar1))
pred.2 <- predict(dj_ar1, level = 0.95, interval="predict")
pred.2
## $pred
## Time Series:
## Start = 293 
## End = 293 
## Frequency = 1 
## [1] 3852.685
## 
## $se
## Time Series:
## Start = 293 
## End = 293 
## Frequency = 1 
## [1] 22.37663

Lecture 4.b

load("usgdp5190.rda")
dlgdp <- usgdp5190

Check (non)stationarity

plot(dlgdp)

acf(dlgdp)

pacf(dlgdp)

Box.test(dlgdp, 10)
## 
##  Box-Pierce test
## 
## data:  dlgdp
## X-squared = 32.267, df = 10, p-value = 0.0003614
Box.test(dlgdp, 10, type = "Ljung-Box")
## 
##  Box-Ljung test
## 
## data:  dlgdp
## X-squared = 33.167, df = 10, p-value = 0.0002553
ar(dlgdp)
## 
## Call:
## ar(x = dlgdp)
## 
## Coefficients:
##       1        2        3  
##  0.3323   0.1258  -0.1523  
## 
## Order selected 3  sigma^2 estimated as  8.866e-05
gdp.ar1 <- arima(dlgdp, c(3, 0, 0))
summary(gdp.ar1)
## 
## Call:
## arima(x = dlgdp, order = c(3, 0, 0))
## 
## Coefficients:
##          ar1     ar2      ar3  intercept
##       0.3351  0.1286  -0.1504     0.0074
## s.e.  0.0781  0.0821   0.0789     0.0011
## 
## sigma^2 estimated as 8.619e-05:  log likelihood = 521.58,  aic = -1035.17
## 
## Training set error measures:
## Warning in trainingaccuracy(object, test, d, D): test elements must be within
## sample
##               ME RMSE MAE MPE MAPE
## Training set NaN  NaN NaN NaN  NaN
gdp.ar2 <- arima(dlgdp, c(1, 0, 0)) #BIC
summary(gdp.ar2)
## 
## Call:
## arima(x = dlgdp, order = c(1, 0, 0))
## 
## Coefficients:
##          ar1  intercept
##       0.3518     0.0075
## s.e.  0.0744     0.0011
## 
## sigma^2 estimated as 8.879e-05:  log likelihood = 519.24,  aic = -1034.49
## 
## Training set error measures:
## Warning in trainingaccuracy(object, test, d, D): test elements must be within
## sample
##               ME RMSE MAE MPE MAPE
## Training set NaN  NaN NaN NaN  NaN
tsdiag(gdp.ar2) 

Diagnostics

tsdiag(gdp.ar1) 

Automatic modelling: AR models

gdpAR <- auto.arima(dlgdp, d=0, D=0, max.p=12, max.q=0, max.P=0, max.Q=0, 
                    max.order=12, ic="bic", stepwise=FALSE, approximation=FALSE)
summary(gdpAR)
## Series: dlgdp 
## ARIMA(1,0,0) with non-zero mean 
## 
## Coefficients:
##          ar1    mean
##       0.3518  0.0075
## s.e.  0.0744  0.0011
## 
## sigma^2 estimated as 8.991e-05:  log likelihood=519.24
## AIC=-1032.49   AICc=-1032.33   BIC=-1023.26
## 
## Training set error measures:
##                         ME        RMSE         MAE      MPE     MAPE      MASE
## Training set -3.452183e-05 0.009422832 0.007413909 91.89917 536.0133 0.6298816
##                     ACF1
## Training set -0.02960257

Automatic modelling: ARMA models

gdpARMA <- auto.arima(dlgdp, d=0, D=0, max.p=12, max.q=12, max.P=0, max.Q=0, 
                    max.order=6, ic="aic", stepwise=FALSE, approximation=FALSE)

Check with BIC

summary(gdpARMA)
## Series: dlgdp 
## ARIMA(0,0,2) with non-zero mean 
## 
## Coefficients:
##          ma1     ma2    mean
##       0.3211  0.2240  0.0075
## s.e.  0.0774  0.0716  0.0011
## 
## sigma^2 estimated as 8.86e-05:  log likelihood=520.91
## AIC=-1033.82   AICc=-1033.56   BIC=-1021.52
## 
## Training set error measures:
##                         ME        RMSE         MAE      MPE     MAPE      MASE
## Training set -3.066226e-05 0.009323987 0.007358365 79.75756 403.7583 0.6251627
##                     ACF1
## Training set 0.007654661
tsdiag(gdpARMA)

gdp.ma2 <- arima(dlgdp, c(0, 0, 2))
summary(gdp.ma2)
## 
## Call:
## arima(x = dlgdp, order = c(0, 0, 2))
## 
## Coefficients:
##          ma1     ma2  intercept
##       0.3211  0.2240     0.0075
## s.e.  0.0774  0.0716     0.0011
## 
## sigma^2 estimated as 8.694e-05:  log likelihood = 520.91,  aic = -1035.82
## 
## Training set error measures:
## Warning in trainingaccuracy(object, test, d, D): test elements must be within
## sample
##               ME RMSE MAE MPE MAPE
## Training set NaN  NaN NaN NaN  NaN

Diagnostics

tsdiag(gdp.ma2) 

Lecture 4.c

load("tb3m5605.rda")
tbill <- tb3m5605

Check (non)stationarity

plot(tbill)

acf(tbill)

dltb <- diff(log(tbill))
plot(dltb)

acf(dltb)

acf(dltb^2)

pacf(dltb)

Which ARMA model to choose?

tsdisplay(dltb) # forecast library

Use BIC (try also a pure AR)

tbBIC <- auto.arima(dltb, d=0, D=0, max.p=10, max.q=10, max.P=0, max.Q=0,
                     max.order=10, ic="bic", stepwise=FALSE, approximation=FALSE)
summary(tbBIC)
## Series: dltb 
## ARIMA(0,0,1) with zero mean 
## 
## Coefficients:
##          ma1
##       0.4643
## s.e.  0.0353
## 
## sigma^2 estimated as 0.004523:  log likelihood=767.3
## AIC=-1530.6   AICc=-1530.58   BIC=-1521.81
## 
## Training set error measures:
##                        ME       RMSE        MAE MPE MAPE    MASE       ACF1
## Training set 0.0005488718 0.06719796 0.04427584 NaN  Inf 0.63517 0.03323095

Diagnostics

tsdiag(tbBIC)

Use AIC

tbAIC1 <- auto.arima(dltb, max.order=5, ic="aic", stepwise=FALSE, approximation=FALSE)
summary(tbAIC1)
## Series: dltb 
## ARIMA(0,0,5) with zero mean 
## 
## Coefficients:
##          ma1     ma2     ma3     ma4     ma5
##       0.5026  0.1297  0.1138  0.0763  0.1031
## s.e.  0.0404  0.0481  0.0549  0.0547  0.0428
## 
## sigma^2 estimated as 0.004466:  log likelihood=773.1
## AIC=-1534.21   AICc=-1534.06   BIC=-1507.83
## 
## Training set error measures:
##                        ME       RMSE        MAE MPE MAPE      MASE         ACF1
## Training set 0.0004302795 0.06654573 0.04381586 NaN  Inf 0.6285713 -0.005389613

Diagnostics

tsdiag(tbAIC1)

Try tbAIC also with max.order=12

tbAIC2 <- auto.arima(dltb, d=0, D=0, max.p=0, max.q=12, max.P=0, max.Q=0,
                    max.order=12, ic="aic", stepwise=FALSE, approximation=FALSE)
summary(tbAIC2)
## Series: dltb 
## ARIMA(0,0,7) with zero mean 
## 
## Coefficients:
##          ma1     ma2     ma3     ma4     ma5      ma6      ma7
##       0.4787  0.1051  0.1027  0.0641  0.0731  -0.1177  -0.1160
## s.e.  0.0405  0.0447  0.0450  0.0463  0.0443   0.0431   0.0394
## 
## sigma^2 estimated as 0.004402:  log likelihood=778.38
## AIC=-1540.76   AICc=-1540.51   BIC=-1505.59
## 
## Training set error measures:
##                        ME     RMSE        MAE MPE MAPE      MASE        ACF1
## Training set 0.0004828888 0.065956 0.04376194 NaN  Inf 0.6277977 0.005586375

Diagnostics

tsdiag(tbAIC2)

acf(residuals(tbAIC2))

Box.test(residuals(tbAIC2), lag=20, fitdf=7, type="Ljung")
## 
##  Box-Ljung test
## 
## data:  residuals(tbAIC2)
## X-squared = 13.909, df = 13, p-value = 0.3803

Exclude non-statistically significant coefficients

#NA means that the coefficient is not fixed and is estimated; last coefficient 
#refers to the constant
#fl<-structure(list(mean=K,x=Dem2,fitted=tbMA0$fitted),class="forecast")
tbMA0 <- arima(dltb, order = c(0,0,7),
               fixed = c(NA, NA, NA, 0, 0, NA, NA, 0))

# summary(tbMA)
# tsdiag(tbMA)

summary(tbMA0)
## 
## Call:
## arima(x = dltb, order = c(0, 0, 7), fixed = c(NA, NA, NA, 0, 0, NA, NA, 0))
## 
## Coefficients:
##          ma1     ma2     ma3  ma4  ma5      ma6      ma7  intercept
##       0.4798  0.0983  0.0766    0    0  -0.1386  -0.1145          0
## s.e.  0.0405  0.0425  0.0391    0    0   0.0406   0.0396          0
## 
## sigma^2 estimated as 0.004375:  log likelihood = 776.7,  aic = -1543.4
## 
## Training set error measures:
## Warning in trainingaccuracy(object, test, d, D): test elements must be within
## sample
##               ME RMSE MAE MPE MAPE
## Training set NaN  NaN NaN NaN  NaN
tsdiag(tbMA0)

dltb_fc <- forecast(tbAIC2, h = 30)
dltb_fc
##          Point Forecast       Lo 80      Hi 80      Lo 95     Hi 95
## Jan 2006  -0.0027281764 -0.08775246 0.08229610 -0.1327616 0.1273052
## Feb 2006  -0.0083551216 -0.10261761 0.08590737 -0.1525172 0.1358069
## Mar 2006   0.0038276047 -0.09085740 0.09851261 -0.1409806 0.1486358
## Apr 2006  -0.0069838281 -0.10207053 0.08810287 -0.1524064 0.1384387
## May 2006  -0.0094284306 -0.10467128 0.08581442 -0.1550898 0.1362329
## Jun 2006   0.0002565358 -0.09518897 0.09570204 -0.1457148 0.1462278
## Jul 2006   0.0002427855 -0.09572582 0.09621139 -0.1465285 0.1470141
## Aug 2006   0.0000000000 -0.09647417 0.09647417 -0.1475445 0.1475445
## Sep 2006   0.0000000000 -0.09647417 0.09647417 -0.1475445 0.1475445
## Oct 2006   0.0000000000 -0.09647417 0.09647417 -0.1475445 0.1475445
## Nov 2006   0.0000000000 -0.09647417 0.09647417 -0.1475445 0.1475445
## Dec 2006   0.0000000000 -0.09647417 0.09647417 -0.1475445 0.1475445
## Jan 2007   0.0000000000 -0.09647417 0.09647417 -0.1475445 0.1475445
## Feb 2007   0.0000000000 -0.09647417 0.09647417 -0.1475445 0.1475445
## Mar 2007   0.0000000000 -0.09647417 0.09647417 -0.1475445 0.1475445
## Apr 2007   0.0000000000 -0.09647417 0.09647417 -0.1475445 0.1475445
## May 2007   0.0000000000 -0.09647417 0.09647417 -0.1475445 0.1475445
## Jun 2007   0.0000000000 -0.09647417 0.09647417 -0.1475445 0.1475445
## Jul 2007   0.0000000000 -0.09647417 0.09647417 -0.1475445 0.1475445
## Aug 2007   0.0000000000 -0.09647417 0.09647417 -0.1475445 0.1475445
## Sep 2007   0.0000000000 -0.09647417 0.09647417 -0.1475445 0.1475445
## Oct 2007   0.0000000000 -0.09647417 0.09647417 -0.1475445 0.1475445
## Nov 2007   0.0000000000 -0.09647417 0.09647417 -0.1475445 0.1475445
## Dec 2007   0.0000000000 -0.09647417 0.09647417 -0.1475445 0.1475445
## Jan 2008   0.0000000000 -0.09647417 0.09647417 -0.1475445 0.1475445
## Feb 2008   0.0000000000 -0.09647417 0.09647417 -0.1475445 0.1475445
## Mar 2008   0.0000000000 -0.09647417 0.09647417 -0.1475445 0.1475445
## Apr 2008   0.0000000000 -0.09647417 0.09647417 -0.1475445 0.1475445
## May 2008   0.0000000000 -0.09647417 0.09647417 -0.1475445 0.1475445
## Jun 2008   0.0000000000 -0.09647417 0.09647417 -0.1475445 0.1475445
#plot(gdp_fc, shaded = FALSE)
#plot(gdp_fc, shadecols = grey(c(0.8, 0.6)))
plot(dltb_fc)

Lecture 4.d

Data

load("usgdp5190.rda")
dlgdp <- usgdp5190

Check (non)stationarity

plot(dlgdp)

acf(dlgdp)

pacf(dlgdp)

gdp <- 100*dlgdp
plot(gdp)

gdp.ar1 <- arima(gdp, c(1, 0, 0))
summary(gdp.ar1)
## 
## Call:
## arima(x = gdp, order = c(1, 0, 0))
## 
## Coefficients:
##          ar1  intercept
##       0.3518     0.7460
## s.e.  0.0744     0.1145
## 
## sigma^2 estimated as 0.8879:  log likelihood = -217.58,  aic = 439.17
## 
## Training set error measures:
## Warning in trainingaccuracy(object, test, d, D): test elements must be within
## sample
##               ME RMSE MAE MPE MAPE
## Training set NaN  NaN NaN NaN  NaN

Predict

Plot

Close the log file

end_time <- Sys.time()
end_time - start_time
## Time difference of 1.116595 mins
  # sprintf(end_time - start_time,fmt = '%#.1f')
#